{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Data Sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:54:53.419541Z",
     "start_time": "2017-07-16T23:54:48.975683Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import time\n",
    "from collections import namedtuple\n",
    "pd.set_option(\"display.max_rows\",35)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:54:54.830540Z",
     "start_time": "2017-07-16T23:54:53.421660Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class dataset:\n",
    "    kdd_train_2labels = pd.read_pickle(\"dataset/kdd_train__2labels.pkl\")\n",
    "    kdd_test_2labels = pd.read_pickle(\"dataset/kdd_test_2labels.pkl\")\n",
    "    kdd_test__2labels = pd.read_pickle(\"dataset/kdd_test__2labels.pkl\")\n",
    "    \n",
    "    kdd_train_5labels = pd.read_pickle(\"dataset/kdd_train__5labels.pkl\")\n",
    "    kdd_test_5labels = pd.read_pickle(\"dataset/kdd_test_5labels.pkl\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:54:54.840225Z",
     "start_time": "2017-07-16T23:54:54.832600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25192, 124)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.kdd_train_2labels.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:54:54.847194Z",
     "start_time": "2017-07-16T23:54:54.842478Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(22544, 124)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.kdd_test_2labels.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:54:58.029202Z",
     "start_time": "2017-07-16T23:54:54.849685Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.97509982675167528"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import model_selection as ms\n",
    "from sklearn import preprocessing as pp\n",
    "\n",
    "class preprocess:\n",
    "    \n",
    "    output_columns_2labels = ['is_Normal','is_Attack']\n",
    "    \n",
    "    x_input = dataset.kdd_train_2labels.drop(output_columns_2labels, axis = 1)\n",
    "    y_output = dataset.kdd_train_2labels.loc[:,output_columns_2labels]\n",
    "\n",
    "    x_test_input = dataset.kdd_test_2labels.drop(output_columns_2labels, axis = 1)\n",
    "    y_test = dataset.kdd_test_2labels.loc[:,output_columns_2labels]\n",
    "    \n",
    "    x_test__input = dataset.kdd_test__2labels.drop(output_columns_2labels, axis = 1)\n",
    "    y_test_ = dataset.kdd_test__2labels.loc[:,output_columns_2labels]\n",
    "\n",
    "    ss = pp.StandardScaler()\n",
    "\n",
    "    x_train = ss.fit_transform(x_input)\n",
    "    x_test = ss.transform(x_test_input)\n",
    "    x_test_ = ss.transform(x_test__input)\n",
    "\n",
    "    y_train = y_output.values\n",
    "    y_test = y_test.values\n",
    "    y_test_ = y_test_.values\n",
    "\n",
    "    \n",
    "preprocess.x_train.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:55:09.004438Z",
     "start_time": "2017-07-16T23:54:58.031406Z"
    }
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.contrib.legacy_seq2seq.python.ops.seq2seq import basic_rnn_seq2seq\n",
    "from tensorflow.contrib.rnn import RNNCell, LSTMCell, MultiRNNCell\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:55:09.312070Z",
     "start_time": "2017-07-16T23:55:09.006640Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class network(object):\n",
    "    \n",
    "    input_dim = 122\n",
    "    classes = 2\n",
    "    hidden_encoder_dim = 122\n",
    "    hidden_layers = 1\n",
    "    latent_dim = 10\n",
    "\n",
    "    hidden_decoder_dim = 122\n",
    "    lam = 0.01\n",
    "    \n",
    "    def __init__(self, classes, hidden_layers, num_of_features):\n",
    "        self.classes = classes\n",
    "        self.hidden_layers = hidden_layers\n",
    "        self.latent_dim = num_of_features\n",
    "            \n",
    "    def build_layers(self):\n",
    "        tf.reset_default_graph()\n",
    "        #learning_rate = tf.Variable(initial_value=0.001)\n",
    "\n",
    "        input_dim = self.input_dim\n",
    "        classes = self.classes\n",
    "        hidden_encoder_dim = self.hidden_encoder_dim\n",
    "        hidden_layers = self.hidden_layers\n",
    "        latent_dim = self.latent_dim\n",
    "        hidden_decoder_dim = self.hidden_decoder_dim\n",
    "        lam = self.lam\n",
    "        \n",
    "        with tf.variable_scope(\"Input\"):\n",
    "            self.x_input = tf.placeholder(\"float\", shape=[None, 1, input_dim])\n",
    "            self.y_input_ = tf.placeholder(\"float\", shape=[None, 1, classes])\n",
    "            self.keep_prob = tf.placeholder(\"float\")\n",
    "            self.lr = tf.placeholder(\"float\")\n",
    "            self.x_list = tf.unstack(self.x_input, axis= 1)\n",
    "            self.y_list_ = tf.unstack(self.y_input_, axis = 1)\n",
    "            self.y_ = self.y_list_[0]\n",
    "            \n",
    "            #GO = tf.fill((tf.shape(self.x)[0], 1), 0.5)\n",
    "            \n",
    "            #y_with_GO = tf.stack([self.y_, GO])\n",
    "            \n",
    "        with tf.variable_scope(\"lstm\"):\n",
    "            multi_cell = MultiRNNCell([LSTMCell(input_dim) for i in range(hidden_layers)] )\n",
    "            \n",
    "            self.y, states = basic_rnn_seq2seq(self.x_list, self.y_list_, multi_cell)\n",
    "            #self.y = tf.slice(self.y, [0, 0], [-1,2])\n",
    "            \n",
    "            #self.out = tf.squeeze(self.y)\n",
    "            \n",
    "            #self.y = tf.layers.dense(self.y[0], classes, activation = None)\n",
    "            \n",
    "            self.y = tf.slice(self.y[0], [0, 0], [-1,2])\n",
    "            \n",
    "        with tf.variable_scope(\"Loss\"):\n",
    "            \n",
    "            self.regularized_loss = tf.losses.mean_squared_error(self.y_, self.y)\n",
    "            correct_prediction = tf.equal(tf.argmax(self.y_, 1), tf.argmax(self.y, 1))\n",
    "            self.tf_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = \"Accuracy\")\n",
    "\n",
    "        with tf.variable_scope(\"Optimizer\"):\n",
    "            learning_rate=self.lr\n",
    "            optimizer = tf.train.AdamOptimizer(learning_rate)\n",
    "            gradients, variables = zip(*optimizer.compute_gradients(self.regularized_loss))\n",
    "            gradients = [\n",
    "                None if gradient is None else tf.clip_by_value(gradient, -1, 1)\n",
    "                for gradient in gradients]\n",
    "            self.train_op = optimizer.apply_gradients(zip(gradients, variables))\n",
    "            #self.train_op = optimizer.minimize(self.regularized_loss)\n",
    "            \n",
    "        # add op for merging summary\n",
    "        #self.summary_op = tf.summary.merge_all()\n",
    "        self.pred = tf.argmax(self.y, axis = 1)\n",
    "        self.actual = tf.argmax(self.y_, axis = 1)\n",
    "\n",
    "        # add Saver ops\n",
    "        self.saver = tf.train.Saver()\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-01T00:59:00.684124Z",
     "start_time": "2017-06-01T00:58:59.843181Z"
    }
   },
   "source": [
    "batch_iterations = 200\n",
    "\n",
    "x_train, x_valid, y_train, y_valid, = ms.train_test_split(preprocess.x_train, \n",
    "                                                                          preprocess.y_train, \n",
    "                                                                          test_size=0.1)\n",
    "batch_indices = np.array_split(np.arange(x_train.shape[0]), \n",
    "                                           batch_iterations)\n",
    "                                                                          \n",
    "for i in batch_indices:\n",
    "    print(x_train[i,np.newaxis,:])\n",
    "    print(y_train[i,np.newaxis,:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-16T23:55:09.721854Z",
     "start_time": "2017-07-16T23:55:09.314465Z"
    },
    "collapsed": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import collections\n",
    "\n",
    "class Train:    \n",
    "    \n",
    "    result = namedtuple(\"score\", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score', 'test_score_20', 'time_taken'])\n",
    "\n",
    "    predictions = {}\n",
    "    predictions_ = {}\n",
    "\n",
    "    results = []\n",
    "    best_acc = 0\n",
    "    best_acc_global = 0\n",
    "\n",
    "    def train(epochs, net, h,f, lrs):\n",
    "        batch_iterations = 200\n",
    "        train_loss = None\n",
    "        Train.best_acc = 0\n",
    "        os.makedirs(\"dataset/tf_lstm_nsl_kdd-orig-/hidden layers_{}_features count_{}\".format(h,f),\n",
    "                    exist_ok = True)\n",
    "        with tf.Session() as sess:\n",
    "            #summary_writer_train = tf.summary.FileWriter('./logs/kdd/VAE/training', graph=sess.graph)\n",
    "            #summary_writer_valid = tf.summary.FileWriter('./logs/kdd/VAE/validation')\n",
    "\n",
    "            sess.run(tf.global_variables_initializer())\n",
    "            start_time = time.perf_counter()\n",
    "            \n",
    "            accuracy, pred_value, actual_value, y_pred = sess.run([net.tf_accuracy, \n",
    "                                                                   net.pred, \n",
    "                                                                   net.actual, net.y], \n",
    "                                                                  feed_dict={net.x_input: preprocess.x_test[:,np.newaxis,:], \n",
    "                                                                             net.y_input_: preprocess.y_test[:,np.newaxis,:], \n",
    "                                                                             net.keep_prob:1})\n",
    "            \n",
    "            print(\"Initial Accuracy, before training: {}\".format(accuracy))\n",
    "            \n",
    "            for c, lr in enumerate(lrs):\n",
    "                for epoch in range(1, (epochs+1)):\n",
    "                    x_train, x_valid, y_train, y_valid, = ms.train_test_split(preprocess.x_train, \n",
    "                                                                              preprocess.y_train, \n",
    "                                                                              test_size=0.1)\n",
    "                    batch_indices = np.array_split(np.arange(x_train.shape[0]), \n",
    "                                               batch_iterations)\n",
    "\n",
    "                    for i in batch_indices:\n",
    "\n",
    "                        _, train_loss = sess.run([net.train_op, net.regularized_loss], #net.summary_op\n",
    "                                                              feed_dict={net.x_input: x_train[i,np.newaxis,:], \n",
    "                                                                         net.y_input_: y_train[i,np.newaxis,:], \n",
    "                                                                         net.keep_prob:1, net.lr:lr})\n",
    "                        #summary_writer_train.add_summary(summary_str, epoch)\n",
    "                        if(train_loss > 1e9):\n",
    "                            print(\"Step {} | Training Loss: {:.6f}\".format(epoch, train_loss))\n",
    "\n",
    "\n",
    "                    valid_accuracy,valid_loss = sess.run([net.tf_accuracy, net.regularized_loss], #net.summary_op \n",
    "                                                          feed_dict={net.x_input: x_valid[:,np.newaxis,:], \n",
    "                                                                     net.y_input_: y_valid[:,np.newaxis,:], \n",
    "                                                                     net.keep_prob:1, net.lr:lr})\n",
    "                    #summary_writer_valid.add_summary(summary_str, epoch)\n",
    "\n",
    "\n",
    "\n",
    "                    accuracy, pred_value, actual_value, y_pred = sess.run([net.tf_accuracy, \n",
    "                                                                   net.pred, \n",
    "                                                                   net.actual, net.y], \n",
    "                                                                  feed_dict={net.x_input: preprocess.x_test[:,np.newaxis,:], \n",
    "                                                                             net.y_input_: preprocess.y_test[:,np.newaxis,:], \n",
    "                                                                             net.keep_prob:1, net.lr:lr})\n",
    "                    \n",
    "                    accuracy_, pred_value_, actual_value_, y_pred_ = sess.run([net.tf_accuracy, \n",
    "                                                                   net.pred, \n",
    "                                                                   net.actual, net.y], \n",
    "                                                                  feed_dict={net.x_input: preprocess.x_test_[:,np.newaxis,:], \n",
    "                                                                             net.y_input_: preprocess.y_test_[:,np.newaxis,:], \n",
    "                                                                             net.keep_prob:1, net.lr:lr})\n",
    "\n",
    "                    print(\"Step {} | Training Loss: {:.6f} | Train Accuracy: {:.6f} | Test Accuracy: {:.6f}, {:.6f}\".format(epoch, train_loss, valid_accuracy, accuracy, accuracy_))\n",
    "\n",
    "                    if accuracy > Train.best_acc_global:\n",
    "                                Train.best_acc_global = accuracy\n",
    "                                Train.pred_value = pred_value\n",
    "                                Train.actual_value = actual_value\n",
    "                                Train.pred_value_ = pred_value_\n",
    "                                Train.actual_value_ = actual_value_\n",
    "                                Train.best_parameters = \"Hidden Layers:{}, Features Count:{}\".format(h, f)\n",
    "\n",
    "                    if accuracy > Train.best_acc:\n",
    "\n",
    "                        #net.saver.save(sess, \"dataset/tf_vae_only_nsl_kdd_hidden layers_{}_features count_{}\".format(epochs,h,f))\n",
    "                        #Train.results.append(Train.result(epochs, f, h,valid_accuracy, accuracy))\n",
    "                        #curr_pred = pd.DataFrame({\"Attack_prob\":y_pred[:,-2], \"Normal_prob\":y_pred[:, -1]})\n",
    "                        #Train.predictions.update({\"{}_{}_{}\".format(epochs,f,h):curr_pred})\n",
    "\n",
    "                        Train.best_acc = accuracy\n",
    "                        if not (np.isnan(train_loss)):\n",
    "                            net.saver.save(sess, \n",
    "                                       \"dataset/tf_lstm_nsl_kdd-orig-/hidden layers_{}_features count_{}/model\"\n",
    "                                       .format(h,f), \n",
    "                                       global_step = epoch, \n",
    "                                       write_meta_graph=False)\n",
    "\n",
    "                        curr_pred = pd.DataFrame({\"Attack_prob\":y_pred[:,-2], \"Normal_prob\":y_pred[:, -1], \"Prediction\":pred_value})\n",
    "                        curr_pred_ = pd.DataFrame({\"Attack_prob\":y_pred_[:,-2], \"Normal_prob\":y_pred_[:, -1], \"Prediction\":pred_value_})\n",
    "                        Train.predictions.update({\"{}_{}_{}\".format((epochs+1)* (c+1),f,h):\n",
    "                                                  (curr_pred, \n",
    "                                                   Train.result((epochs+1)*(c+1), f, h,valid_accuracy, accuracy, accuracy_, time.perf_counter() - start_time))})\n",
    "                        Train.predictions_.update({\"{}_{}_{}\".format((epochs+1)* (c+1),f,h):\n",
    "                                                  (curr_pred_, \n",
    "                                                   Train.result((epochs+1)*(c+1), f, h,valid_accuracy, accuracy, accuracy_, time.perf_counter() - start_time))})\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:22:10.511Z"
    },
    "collapsed": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import itertools\n",
    "\n",
    "df_results = []\n",
    "past_scores = []\n",
    "\n",
    "class Hyperparameters:\n",
    "#    features_arr = [2, 4, 8, 16, 32, 64, 128, 256]\n",
    "#    hidden_layers_arr = [2, 4, 6, 10]\n",
    "\n",
    "    def start_training():\n",
    "\n",
    "        global df_results\n",
    "        global past_scores\n",
    "        \n",
    "        Train.predictions = {}\n",
    "        Train.predictions_ = {}\n",
    "\n",
    "        Train.results = []\n",
    "        \n",
    "        features_arr = [1] #[4, 8, 16, 32]\n",
    "        hidden_layers_arr = [1, 3]\n",
    "\n",
    "        epochs = [10]\n",
    "        lrs = [1e-2, 1e-3]\n",
    "\n",
    "        for e, h, f in itertools.product(epochs, hidden_layers_arr, features_arr):\n",
    "            print(\"Current Layer Attributes - epochs:{} hidden layers:{} features count:{}\".format(e,h,f))\n",
    "            n = network(2,h,f)\n",
    "            n.build_layers()\n",
    "            Train.train(e, n, h,f, lrs)\n",
    "            \n",
    "        dict1 = {}\n",
    "        dict1_ = {}\n",
    "        dict2 = []\n",
    "        for k, (v1, v2) in Train.predictions.items():\n",
    "            dict1.update({k: v1})\n",
    "            dict2.append(v2)\n",
    "            \n",
    "        for k, (v1_, v2) in Train.predictions_.items():\n",
    "            dict1_.update({k: v1_})\n",
    "\n",
    "        Train.predictions = dict1\n",
    "        Train.predictions_ = dict1_\n",
    "        Train.results = dict2\n",
    "        \n",
    "        df_results = pd.DataFrame(Train.results)\n",
    "        temp = df_results.set_index(['no_of_features', 'hidden_layers'])\n",
    "\n",
    "        if not os.path.isfile('dataset/scores/tf_lstm_nsl_kdd-orig_all-.pkl'):\n",
    "            past_scores = temp\n",
    "        else:\n",
    "            past_scores = pd.read_pickle(\"dataset/scores/tf_lstm_nsl_kdd-orig_all-.pkl\")\n",
    "\n",
    "        past_scores.append(temp).to_pickle(\"dataset/scores/tf_lstm_nsl_kdd-orig_all-.pkl\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:22:10.974Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%timeit -r 10\n",
    "\n",
    "Hyperparameters.start_training()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-16T20:12:38.742950Z",
     "start_time": "2017-06-16T20:12:38.705898Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-06-16T20:12:38.748800Z",
     "start_time": "2017-06-16T20:12:38.744607Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:30.446Z"
    }
   },
   "outputs": [],
   "source": [
    "g = df_results.groupby(by=['no_of_features'])\n",
    "idx = g['test_score'].transform(max) == df_results['test_score']\n",
    "df_results[idx].sort_values(by = 'test_score', ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:31.401Z"
    }
   },
   "outputs": [],
   "source": [
    "df_results.sort_values(by = 'test_score', ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:31.858Z"
    }
   },
   "outputs": [],
   "source": [
    "pd.Panel(Train.predictions).to_pickle(\"dataset/tf_lstm_nsl_kdd_predictions-.pkl\")\n",
    "pd.Panel(Train.predictions_).to_pickle(\"dataset/tf_lstm_nsl_kdd_predictions-__.pkl\")\n",
    "\n",
    "df_results.to_pickle(\"dataset/tf_lstm_nsl_kdd_scores-.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:32.021Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import itertools\n",
    "\n",
    "def plot_confusion_matrix(cm, classes,\n",
    "                          normalize=False,\n",
    "                          title='Confusion matrix',\n",
    "                          cmap=plt.cm.Blues):\n",
    "    \"\"\"\n",
    "    This function prints and plots the confusion matrix.\n",
    "    Normalization can be applied by setting `normalize=True`.\n",
    "    \"\"\"\n",
    "    np.set_printoptions(precision=4)\n",
    "\n",
    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
    "    plt.title(title)\n",
    "    plt.colorbar()\n",
    "    tick_marks = np.arange(len(classes))\n",
    "    plt.xticks(tick_marks, classes, rotation=45)\n",
    "    plt.yticks(tick_marks, classes)\n",
    "\n",
    "    if normalize:\n",
    "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
    "        print(\"Normalized confusion matrix\")\n",
    "    else:\n",
    "        print('Confusion matrix, without normalization')\n",
    "\n",
    "    print(cm)\n",
    "\n",
    "    thresh = cm.max() / 2.\n",
    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
    "        plt.text(j, i, cm[i, j].round(4),\n",
    "                 horizontalalignment=\"center\",\n",
    "                 color=\"white\" if cm[i, j] > thresh else \"black\")\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.ylabel('True label')\n",
    "    plt.xlabel('Predicted label')\n",
    "\n",
    "def plot(actual_value, pred_value):\n",
    "    from sklearn.metrics import confusion_matrix\n",
    "    cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)\n",
    "    plt.figure(figsize=[6,6])\n",
    "    plot_confusion_matrix(cm_2labels, preprocess.output_columns_2labels, normalize = False,\n",
    "                         title = Train.best_parameters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:32.253Z"
    }
   },
   "outputs": [],
   "source": [
    "plot(actual_value = Train.actual_value, pred_value = Train.pred_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:33.363Z"
    }
   },
   "outputs": [],
   "source": [
    "plot(actual_value = Train.actual_value_, pred_value = Train.pred_value_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:34.351Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#4.5 GB\n",
    "pd.Series(Train.pred_value).to_csv('LSTM_prediction_values-.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:35.120Z"
    }
   },
   "outputs": [],
   "source": [
    "past_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:35.175Z"
    }
   },
   "outputs": [],
   "source": [
    "pgb = past_scores.groupby(by=['no_of_features', 'hidden_layers'])\n",
    "pgb.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-07-17T00:51:37.935Z"
    }
   },
   "outputs": [],
   "source": [
    "pgb.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "_draft": {
   "nbviewer_url": "https://gist.github.com/7d1ace18a82178e15ece8fc5252fce88"
  },
  "anaconda-cloud": {},
  "gist": {
   "data": {
    "description": "Hyper parameter tuning",
    "public": false
   },
   "id": "7d1ace18a82178e15ece8fc5252fce88"
  },
  "kernelspec": {
   "display_name": "Python [conda env:p3]",
   "language": "python",
   "name": "conda-env-p3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
